import requests
from lxml import etree
import pymysql
db = pymysql.connect("10.3.110.175", "root", "jinshanjf#168", "fmi_0716")
cursor = db.cursor()

def getFgJ(provinceName, provinceUrl, city):
    print "provinceName:" + provinceName + ",provinceUrl:" + provinceUrl + ",city:" + city
    html = requests.get("http://www.go007.com" + provinceUrl)

    etree_html = etree.HTML(html.text)
    content = etree_html.xpath('//div[@class="zfList police"]/ul')

    for each in content:
        name = each.xpath(".//li[1]/h3/a/text()")[0]
        area = each.xpath(".//li[2]/span[1]/text()")[0]
        tel = each.xpath('.//li[2]/span[2]/text()')
        telPhone = ''
        if(len(tel) > 0):
            tel = tel[0]
            telPhone = tel[3:].replace("%", "\\").encode("gbk").decode("unicode_escape")

        # print name + '-' + area  + '-' + telPhone
        sql = 'INSERT INTO `gov_organs_data` (`province_name`,`city`, `type`, `name`, `address`, `tel_phone`) VALUES(\'' + provinceName + '\',\'' + city + '\', \'3\', \'' + name + '\',\'' + area[3:] + '\',\''  + telPhone + '\');'
        try:
            print sql
            # cursor.execute(sql)
            db.commit()
        except:
            # Rollback in case there is any error
            db.rollback()

    page = etree_html.xpath('//div[@class="zfBox"]/ul[@class="UNav"]/li[1]/a[last()-1]')[0]
    nextPageUrl = page.xpath('.//@href')
    if(len(nextPageUrl) > 0):
        getFgJ(provinceName, nextPageUrl[0], city)


def getCity(provinceName, provinceUrl):
    html = requests.get("http://www.go007.com" + provinceUrl)
    etree_html = etree.HTML(html.text)
    provinces = etree_html.xpath('//dl[@class="place"]/dd/a')
    for province in provinces:
        city = province.xpath(".//text()")[0]
        cityUrl = province.xpath(".//@href")[0]
        if (cityUrl != '/ditu/fangguanju_cn/' and cityUrl != '/ditu/minzhengju_cn/' and cityUrl != provinceUrl):
            getFgJ(provinceName, cityUrl, city)

# html = requests.get("http://www.go007.com/ditu/fangguanju_cn/")
html = requests.get("http://www.go007.com/ditu/minzhengju_cn/")
etree_html = etree.HTML(html.text)
provinces = etree_html.xpath('//dl[@class="place"]/dd/a')
for province in provinces:
    provinceName = province.xpath(".//text()")[0]
    provinceUrl = province.xpath(".//@href")[0]
    if (provinceUrl != '/ditu/fangguanju_cn/' and provinceUrl != '/ditu/minzhengju_cn/'):
        getCity(provinceName, provinceUrl)

db.close()